package com.web.spider.youku.util;

import org.htmlcleaner.TagNode;
import org.htmlcleaner.XPatherException;

import java.util.regex.Pattern;

/**
 * @author cherrish
 * @name HtmlUtil
 * @date 2019/4/7 21:54
 * @desc TODO
 */
public class HtmlUtil {
    //获取标签属性值
    public static String getAttributeByName(TagNode rootNode,String xpath,String att){
        String result = null;
        Object[] evaluateXPath = null;
        try {
            evaluateXPath = rootNode.evaluateXPath(xpath);
            if(evaluateXPath.length>0){
                TagNode node = (TagNode)evaluateXPath[0];
                return node.getAttributeByName(att);
            }
        } catch (XPatherException e) {
            e.printStackTrace();
        }
        return result;
    }

    public static String getFieldByRegex(TagNode rootNode,String xpath,String regex){
        String number = "0";
        Object[] evaluateXPath = null;
        try {
            evaluateXPath = rootNode.evaluateXPath(xpath);
            if(evaluateXPath.length>0){
                TagNode node = (TagNode)evaluateXPath[0];
                Pattern numberPattern = Pattern.compile(regex,Pattern.DOTALL);
                number = RegexUtil.getPageInfoByRegex(node.getText().toString(), numberPattern, 0);
            }
        } catch (XPatherException e) {
            e.printStackTrace();
        }
        return number;
    }
}
